Kerry Back
from sklearn.compose import TransformedTargetRegressor transform3 = QuantileTransformer( output_distribution="normal" ) model = TransformedTargetRegressor( regressor=RandomForestRegressor(random_state=0), transformer=transform3 )
pipe = make_pipeline( transform1, poly, transform2, model ) param_grid = { "transformedtargetregressor__regressor__max_depth": [4, 6, 8] }
cv = GridSearchCV( pipe, param_grid=param_grid ) X = data[["roeq", "mom12m"]] y = data["ret"] cv.fit(X, y)
Run GridSearchCV with
data["roeqx"] = data.groupby("industry").roeq.transform( lambda x: x - x.mean() ) data["mom12mx"] = data.groupby("industry").mom12m.transform( lambda x: x - x.mean() ) X = data[["roeq", "mom12m", "roeqx", "mom12mx", "industry"]]
Then use OneHotEncoder and make_column_transformer as before.
chars = ["roeq", "mom12m"] for char in chars: data[char+"x"] = data.groupby("industry")[char].transform( lambda x: x - x.mean() ) newchars = chars + [char+"x" for char in chars] X = data[newchars+["industry"]]